In [1]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import cm as CM
from matplotlib import mlab as ML
import pandas as pd
import numpy as np
import itertools
from scipy import stats
from scipy.stats import poisson
from scipy.stats import lognorm
from decimal import *
from mpl_toolkits.mplot3d import Axes3D
from pylab import *
import sklearn.mixture
import scipy.stats as ss
import seaborn as sns
In [2]:
np.random.seed(12345678) # for reproducibility, set random seed
# Read in data
df = pd.read_csv('../output.csv')
nvox = 64*64*48 # assume number of voxels per bin
df['weighted'] = df['synapses']/df['unmasked']*nvox
df.head()
Out[2]:
In [3]:
xvals = df['cx'].unique()
yvals = df['cy'].unique()
zvals = df['cz'].unique()
# Get rid of the blank edges, Z-layer by Z-layer.
left = 0;
right = len(xvals);
top = 0;
bottom = len(yvals);
for z in zvals:
this_z = df[df['cz']==z]
# X direction
xhist, bin_edges = np.histogram(this_z['cx'], weights = this_z['unmasked']/(nvox*len(yvals)), bins=len(xvals))
left = max(left, np.argmax(xhist>0.5))
right = min(right, len(xvals)-np.argmax(xhist[::-1]>0.5))
# Y direction
yhist, bin_edges = np.histogram(this_z['cy'], weights = this_z['unmasked']/(nvox*len(xvals)), bins=len(yvals))
top = max(top, np.argmax(yhist>0.5))
bottom = min(bottom, len(yvals)-np.argmax(yhist[::-1]>0.5))
# Copy new dataset without edges
df2 = df.copy()
for z in zvals:
df2.drop(df2.index[(df2['cx']<xvals[left]) | (df2['cx']>=xvals[right])], inplace=True)
df2.drop(df2.index[(df2['cy']<yvals[top]) | (df2['cy']>=yvals[bottom])], inplace=True)
print "There are", len(df2), "bins after removing the edges."
df2.head()
Out[3]:
In [4]:
xvals = df2['cx'].unique()
yvals = df2['cy'].unique()
zvals = df2['cz'].unique()
# Example X slices after removing edges
x = xvals[0]
Xslice = df2[df2['cx']==x].pivot_table(index='cz', columns='cy', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Xslice, xticklabels=10, yticklabels=3,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('X = '+str(x)+' slice with edges removed');
x = xvals[-1]
Xslice = df2[df2['cx']==x].pivot_table(index='cz', columns='cy', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Xslice, xticklabels=10, yticklabels=3,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('X = '+str(x)+' slice with edges removed');
# Example Y slices after removing edges
y = yvals[0]
Yslice = df2[df2['cy']==y].pivot_table(index='cz', columns='cx', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Yslice, xticklabels=20, yticklabels=3,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('Y = '+str(y)+' slice with edges removed');
y = yvals[-1]
Yslice = df2[df2['cy']==y].pivot_table(index='cz', columns='cx', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Yslice, xticklabels=20, yticklabels=3,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('Y = '+str(y)+' slice with edges removed');
# Example Z slices after removing edges
z = zvals[0]
Zslice = df2[df2['cz']==z].pivot_table(index='cy', columns='cx', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Zslice, xticklabels=20, yticklabels=10,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('Z = '+str(z)+' slice with edges removed');
z = zvals[-1]
Zslice = df2[df2['cz']==z].pivot_table(index='cy', columns='cx', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(Zslice, xticklabels=20, yticklabels=10,
cbar_kws={'label': 'Weighted synapse count'});
plt.title('Z = '+str(z)+' slice with edges removed');
In [5]:
plt.figure()
plt.hist(df2['cx'], weights=df2['weighted'], bins=xvals.size, color='g', edgecolor='none')
plt.title('Distribution of Weighted Synapses along X - coordinate')
plt.xlabel('X - coordinate')
plt.ylabel('Count')
plt.show()
plt.figure()
plt.hist(df2['cy'], weights=df2['weighted'], bins=yvals.size, color='g', edgecolor='none')
plt.title('Distribution of Weighted Synapses along Y - coordinate')
plt.xlabel('Y - coordinate')
plt.ylabel('Count')
plt.show()
plt.figure()
plt.hist(df2['cz'], weights=df2['weighted'], bins=zvals.size, color='g', edgecolor='none')
plt.title('Distribution of Weighted Synapses along Z - coordinate')
plt.xlabel('Z - coordinate')
plt.ylabel('Count')
plt.show()
In [6]:
sumXY = pd.pivot_table(df2, index='cy', columns='cx', values='weighted', aggfunc=np.sum)
sumXZ = pd.pivot_table(df2, index='cz', columns='cx', values='weighted', aggfunc=np.sum)
sumYZ = pd.pivot_table(df2, index='cz', columns='cy', values='weighted', aggfunc=np.sum)
plt.figure()
sns.heatmap(sumXY, xticklabels=20, yticklabels=10, cbar_kws={'label': 'Weighted'});
plt.title('Number of Weighted Synapses at X-Y coordinates');
plt.figure()
sns.heatmap(sumXZ, xticklabels=20, yticklabels=2, cbar_kws={'label': 'Weighted'});
plt.title('Number of Weighted Synapses at X-Z coordinates');
plt.figure()
sns.heatmap(sumYZ, xticklabels=10, yticklabels=2, cbar_kws={'label': 'Weighted'});
plt.title('Number of Weighted Synapses at Y-Z coordinates');
In [7]:
sns.set_style('white')
wt_max = df2['weighted'].max()
cmap = np.array(sns.cubehelix_palette(256))
cmap = np.hstack((cmap, np.linspace(0,1,256).reshape(-1,1)))
subsamp = np.random.choice(len(df2), size=len(df2)/9, replace=False) # reduce resolution
idx = df2.index[subsamp]
colors = [cmap[np.rint(255*(df2.ix[i]['weighted']/wt_max)).astype(int)] for i in idx]
ax = plt.figure().gca(projection='3d')
patches = ax.scatter(xs=df2.ix[idx]['cx'], ys=df2.ix[idx]['cy'], zs=df2.ix[idx]['cz'],
s=20*(df2.ix[idx]['weighted']/wt_max), c=colors,
edgecolors=None)
plt.xlabel('X')
plt.ylabel('Y')
plt.gca().set_zlabel('Z')
plt.show()
In [8]:
sns.set_style('white')
thresh = 250
wt_max = df2['weighted'].max()
cmap = np.array(sns.cubehelix_palette(256))
cmap = np.hstack((cmap, np.linspace(0,1,256).reshape(-1,1)))
df2thresh = df2[df2['weighted']>thresh] # high density data
subsamp = np.random.choice(len(df2thresh), size=len(df2thresh)/4, replace=False) # reduce resolution
idx = df2thresh.index[subsamp]
colors = [cmap[np.rint(255*(df2.ix[i]['weighted']/wt_max)).astype(int)] for i in idx]
ax = plt.figure().gca(projection='3d')
patches = ax.scatter(xs=df2.ix[idx]['cx'], ys=df2.ix[idx]['cy'], zs=df2.ix[idx]['cz'],
s=40*((df2.ix[idx]['weighted']-thresh)/(wt_max-thresh)), c=colors,
edgecolors=None)
plt.xlabel('X')
plt.ylabel('Y')
plt.gca().set_zlabel('Z')
plt.show()
In [9]:
sns.set_style('white')
thresh = 400
wt_max = df2['weighted'].max()
cmap = np.array(sns.cubehelix_palette(256))
cmap = np.hstack((cmap, np.linspace(0,1,256).reshape(-1,1)))
df2thresh = df2[df2['weighted']>thresh] # high density data
idx = df2thresh.index
colors = [cmap[np.rint(255*(df2.ix[i]['weighted']/wt_max)).astype(int)] for i in idx]
ax = plt.figure().gca(projection='3d')
patches = ax.scatter(xs=df2.ix[idx]['cx'], ys=df2.ix[idx]['cy'], zs=df2.ix[idx]['cz'],
s=50*((df2.ix[idx]['weighted']-thresh)/(wt_max-thresh)), c=colors,
edgecolors=None)
plt.xlabel('X')
plt.ylabel('Y')
plt.gca().set_zlabel('Z')
plt.show()
In [10]:
sns.set_style('darkgrid')
step = 20
bin_edges = np.arange(0,np.ceil(df2['weighted'].max()/step)*step+1,step)
plt.figure()
df2['weighted'].hist(bins=bin_edges)
plt.xlabel('Weighted number of synapses')
plt.ylabel('Count')
plt.show()
In [11]:
mw = df2['weighted'].mean()
print "The mean weighted number of synapses is", mw
expected = [mw]*len(df2)
(chi2,p) = ss.chisquare(df2['weighted'], f_exp=expected, ddof=1)
print "chi-squared statistic: ", chi2
print "p-value: ", p
Since the p-value is very small, we reject the null hypothesis that synapses are uniformly distributed with 241.5 synapses per bin on average.
In [12]:
plt.figure()
df2.boxplot(column='weighted', return_type='dict')
plt.show()
In [13]:
from matplotlib.colors import LogNorm
S = np.array([
pd.pivot_table(df2[df2['cx']==x], index='cy', columns='cz', values='weighted', aggfunc=np.sum).values
for x in xvals
])
# print pd.pivot_table(df2[df2['cx']==x], index='cy', columns='cz', values='weighted', aggfunc=np.sum).head()
FS = np.fft.fftn(S)
PSD = np.abs(np.fft.fftshift(FS))**2
# Example XY slices
plt.figure()
data = PSD[:,:,0].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('X freq')
plt.ylabel('Y freq')
plt.show()
plt.figure()
data = PSD[:,:,-1].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('X freq')
plt.ylabel('Y freq')
plt.show()
# Example XZ slices
plt.figure()
data = PSD[:,0,:].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('X freq')
plt.ylabel('Z freq')
plt.show()
plt.figure()
data = PSD[:,-1,:].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('X freq')
plt.ylabel('Z freq')
plt.show()
# Example YZ slices
plt.figure()
data = PSD[0,:,:].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('Y freq')
plt.ylabel('Z freq')
plt.show()
plt.figure()
data = PSD[-1,:,:].T
sns.heatmap(data, xticklabels=False, yticklabels=False,
norm=LogNorm(vmin=data.min(), vmax=data.max()),
cbar_kws={'label': 'PSD'})
plt.xlabel('Y freq')
plt.ylabel('Z freq')
plt.show()
In [68]:
# Read in data
df = pd.read_csv('../output.csv')
In [69]:
nvox = 64*64*48
dfcut = df[(df['cx'] > 400) & (df['cx'] < 3800) & (df['cy'] < 2800)]
dfftr = dfcut[dfcut['unmasked'] > nvox * 0.5]
sumXY = pd.pivot_table(dfftr, index='cy', columns='cx', values='synapses', aggfunc=np.sum)
sumXZ = pd.pivot_table(dfftr, index='cz', columns='cx', values='synapses', aggfunc=np.sum)
sumYZ = pd.pivot_table(dfftr, index='cz', columns='cy', values='synapses', aggfunc=np.sum)
import seaborn as sns
plt.figure()
sns.heatmap(sumXY, xticklabels=20, yticklabels=10, cbar_kws={'label': 'Synapses'}, cmap="YlGnBu");
plt.title('Number of Synapses at X-Y coordinates');
plt.figure()
sns.heatmap(sumXZ, xticklabels=20, yticklabels=2, cbar_kws={'label': 'Synapses'}, cmap="YlGnBu");
plt.title('Number of Synapses at X-Z coordinates');
plt.figure()
sns.heatmap(sumYZ, xticklabels=10, yticklabels=2, cbar_kws={'label': 'Synapses'}, cmap="YlGnBu");
plt.title('Number of Synapses at Y-Z coordinates');
In [105]:
nvox = 64 * 64 * 48
dfcut['weighted'] = dfcut['synapses'] / dfcut['unmasked'] * nvox
dfthr = dfcut[dfcut['unmasked'] > nvox * 0.5]
k = [0.7, 0.75, 0.8]
for j in k:
dfweg = dfcut[dfcut['unmasked'] > nvox * j] # Thresholded data frame
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
c = 'r'
m = '8'
xs = dfweg['cx']
ys = dfweg['cy']
zs = dfweg['cz']
ax.scatter(xs, zs, ys, c=c, marker = m, s = 0.65)
ax.set_xlabel('X Label')
ax.set_ylabel('Z Label')
ax.set_zlabel('Y Label')
ax.view_init(azim = 35)
fig.show()
In [106]:
for i in ([0.7, 0.75, 0.78, 0.82]):
dfweg = dfcut[dfcut['unmasked'] > nvox * i] # Thresholded data frame
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection='3d')
xs = dfweg['cx']
ys = dfweg['cy']
zs = dfweg['cz']
the_fourth_dimension = dfweg['weighted']
colors = cm.hsv(the_fourth_dimension/max(the_fourth_dimension))
colmap = cm.ScalarMappable(cmap=cm.hsv)
colmap.set_array(the_fourth_dimension)
m = '8'
yg = ax.scatter(xs, zs, ys, c = colors, marker = m, s = 5)
cb = fig.colorbar(colmap)
ax.set_xlabel('X Label')
ax.set_ylabel('Z Label')
ax.set_zlabel('Y Label')
ax.view_init(elev = 25, azim = 30)
plt.show()
It basically tell us that the higher synapses accumulated in such Z-layer, not randomly distributed.
In [19]:
sample = dfthr['weighted']
m = 8 # bin number
boundary = np.linspace(100, 500, m + 1)
exp_freq = len(sample) / 10
bin_content = []
for i in boundary:
bin_content = np.append(bin_content, sum(sample <= i))
obe_freq = []
for i in range(0, m + 1):
if i == 0:
obe_freq = np.append(obe_freq, bin_content[i])
if i > 0:
obe_freq = np.append(obe_freq, bin_content[i] - bin_content[i - 1])
observed_values = obe_freq
test_value = stats.chisquare(observed_values, f_exp = exp_freq)[1]
print test_value
Since p-value is nearly 0.0, it strongly rejects the null hypothesis that the new data set follows uniform distribution.
In [32]:
arr_z = []
for i in range(0, 6):
arr_z.append(dfthr[dfthr['cz'] == dfthr['cz'].unique()[i]]['weighted'])
stats.f_oneway(arr_z[0],arr_z[1],arr_z[2],arr_z[3],arr_z[4],arr_z[5])
Out[32]:
Since p-value is nearly 0, it suggests that the mean of synapses are not equal in each Z-layer.
In [33]:
for i, j in zip(range(0, 6), [231, 232, 233, 234, 235, 236]):
ax = plt.subplot(j)
x = arr_z[i]
res = stats.probplot(x, plot = plt)
Check the assumption of ANOVA, it suggests that the synapses in each Z-layer are normally distributed.
In [126]:
scy = sorted(dfthr['cy'].unique())
sy1 = dfthr[(dfthr['cy'] >= scy[0]) & (dfthr['cy'] <= scy[9])]
sy2 = dfthr[(dfthr['cy'] >= scy[9]) & (dfthr['cy'] <= scy[19])]
sy3 = dfthr[(dfthr['cy'] >= scy[19]) & (dfthr['cy'] <= scy[29])]
sy4 = dfthr[(dfthr['cy'] >= scy[29]) & (dfthr['cy'] <= scy[36])]
print(stats.f_oneway(sy1['weighted'], sy2['weighted'], sy3['weighted'], sy4['weighted']))
print
yset = [sy1, sy2, sy3, sy4]
for i in range(0, 4):
print "Variance of layer %d is: %d" % (i+1, np.var(yset[i]['weighted']))
print "Mean of layer %d is: %7d" % (i+1, np.average(yset[i]['weighted']))
Since the p-value are nearly 0, it suggests that the mean synapses are not the same in each Y-layer.
In [46]:
print stats.ttest_ind(sy1['weighted'], sy2['weighted'], equal_var = True)
print stats.ttest_ind(sy1['weighted'], sy3['weighted'], equal_var = True)
print stats.ttest_ind(sy1['weighted'], sy4['weighted'], equal_var = True)
print stats.ttest_ind(sy2['weighted'], sy3['weighted'], equal_var = True)
print stats.ttest_ind(sy2['weighted'], sy4['weighted'], equal_var = True)
print stats.ttest_ind(sy3['weighted'], sy4['weighted'], equal_var = True)
According to the results, it turns out that each Y-layer are significantly different from each other.
In [112]:
out = []
Yvalue = np.unique(dfthr['cy'])
minlen = 10000000
for i in Yvalue:
temp = len(dfthr[dfthr['cy'] == i])
if temp < minlen:
minlen = temp
for i in Yvalue:
if i == 1369:
temp = dfthr[dfthr['cy'] == i]['weighted']
out = np.random.choice(temp, minlen)
else :
temp = dfthr[dfthr['cy'] == i]['weighted']
sample = np.random.choice(temp, minlen)
out = np.column_stack((out, sample))
nclusters = range(1, 6)
bic = np.array(())
for idx in nclusters:
print "Fitting and evaluating model with " + str(idx) + " clusters."
gmm = sklearn.mixture.GMM(n_components=idx, n_iter=1000, covariance_type = 'diag')
gmm.fit(out)
bic = np.append(bic, gmm.bic(out))
plt.figure(figsize=(5, 5))
plt.plot(nclusters, 1.0/bic)
plt.title('BIC')
plt.ylabel('score')
plt.xlabel('number of clusters')
plt.show()
print bic
However, based on the result of clustering, it turns out that Y-layers are identical.
In [107]:
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn import cross_validation
from sklearn.cross_validation import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
data = dfthr['weighted']
est = KMeans(n_clusters = 3)
est.fit(data.reshape(len(data),1))
labels = est.labels_
dfthr['pred'] = labels + 1
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection='3d')
xs = dfthr['cx']
ys = dfthr['cy']
zs = dfthr['cz']
the_fourth_dimension = dfthr['pred']
colors = cm.hsv(the_fourth_dimension/max(the_fourth_dimension))
colmap = cm.ScalarMappable(cmap=cm.hsv)
colmap.set_array(the_fourth_dimension)
m = '8'
yg = ax.scatter(xs, zs, ys, c = colors, marker = m, s = 5)
cb = fig.colorbar(colmap)
ax.set_xlabel('X Label')
ax.set_ylabel('Z Label')
ax.set_zlabel('Y Label')
ax.view_init(elev = 25, azim = 25)
plt.show()
In [100]:
import statsmodels.api as sm
X = dfthr[['cz','cx','cy']]
y = dfthr['weighted']
X = sm.add_constant(X)
est = sm.OLS(y, X).fit()
print(est.summary())
In [101]:
norm_x = X.values
for i, name in enumerate(X):
if name == "const":
continue
norm_x[:,i] = X[name]/np.linalg.norm(X[name])
norm_xtx = np.dot(norm_x.T, norm_x)
eigs = np.linalg.eigvals(norm_xtx)
condition_number = np.sqrt(eigs.max() / eigs.min())
print(condition_number)
Obviously, the result is very awful. Synapses density cannot be fitted with regression, at least simple linear regression is not applicable.
In [34]:
np.random.seed(12345678)
df = pd.read_csv('../output.csv')
create weighted. Throw out bins with unmasked<=.5*nvox
In [11]:
nvox = 64*64*48
df['weighted'] = df['synapses']/df['unmasked']*nvox
dfthr = df[df['unmasked']>nvox*0.5] # Thresholded data frame
In [12]:
sumXY = pd.pivot_table(dfthr, index='cy', columns='cx', values='synapses', aggfunc=np.sum)
sumXZ = pd.pivot_table(dfthr, index='cz', columns='cx', values='synapses', aggfunc=np.sum)
sumYZ = pd.pivot_table(dfthr, index='cz', columns='cy', values='synapses', aggfunc=np.sum)
plt.figure()
sns.heatmap(sumXY, xticklabels=20, yticklabels=10, cbar_kws={'label': 'Synapses'});
plt.title('Weighted Number of Synapses at X-Y coordinates');
plt.figure()
sns.heatmap(sumXZ, xticklabels=20, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Weighted Number of Synapses at X-Z coordinates');
plt.figure()
sns.heatmap(sumYZ, xticklabels=10, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Weighted Number of Synapses at Y-Z coordinates');
In [13]:
sumXY = pd.pivot_table(df, index='cy', columns='cx', values='synapses', aggfunc=np.sum)
sumXZ = pd.pivot_table(df, index='cz', columns='cx', values='synapses', aggfunc=np.sum)
sumYZ = pd.pivot_table(df, index='cz', columns='cy', values='synapses', aggfunc=np.sum)
plt.figure()
sns.heatmap(sumXY, xticklabels=20, yticklabels=10, cbar_kws={'label': 'Synapses'});
plt.title('Number of Synapses at X-Y coordinates');
plt.figure()
sns.heatmap(sumXZ, xticklabels=20, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Number of Synapses at X-Z coordinates');
plt.figure()
sns.heatmap(sumYZ, xticklabels=10, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Number of Synapses at Y-Z coordinates');
In [20]:
cxu = dfthr['cx'].unique()
print cxu
dfcut = dfthr[(dfthr['cx']>=331) & (dfthr['cx']<=3841) & (dfthr['cy']<=2905)]
sumXY = pd.pivot_table(dfcut, index='cy', columns='cx', values='synapses', aggfunc=np.sum)
sumXZ = pd.pivot_table(dfcut, index='cz', columns='cx', values='synapses', aggfunc=np.sum)
sumYZ = pd.pivot_table(dfcut, index='cz', columns='cy', values='synapses', aggfunc=np.sum)
plt.figure()
sns.heatmap(sumXY, xticklabels=20, yticklabels=10, cbar_kws={'label': 'Synapses'});
plt.title('Weighted, Edges Removed, X-Y coordinates');
plt.figure()
sns.heatmap(sumXZ, xticklabels=20, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Weighted Number of Synapses at X-Z coordinates');
plt.figure()
sns.heatmap(sumYZ, xticklabels=10, yticklabels=2, cbar_kws={'label': 'Synapses'});
plt.title('Weighted Number of Synapses at Y-Z coordinates');
In [75]:
print dfcut['weighted'].max(), dfcut['weighted'].min()
colorBinned = np.round(20*(dfcut['weighted'] - dfcut['weighted'].min())/(dfcut['weighted'].max() - dfcut['weighted'].min()))
# print np.unique(colorBinned)
# print np.arange(20)
colors = cm.gray(np.linspace(0, 1, len(colorBinned)))
# print colors[np.arange(21)]
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# ax.scatter(np.arange(21),np.arange(21),zs = np.arange(21),c = colors[np.arange(21)])
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dfcut['cx'],dfcut['cy'],zs=dfcut['cz'],c=colorBinned)
Out[75]:
In [83]:
colorBinned = np.round(20*(dfcut['weighted'] - dfcut['weighted'].min())/(dfcut['weighted'].max() - dfcut['weighted'].min()))
# print np.unique(colorBinned)
# unqColor = np.unique(colorBinned)
for cc in [0,5,10,15,20]:
curDf = dfcut[(colorBinned<=cc) & (colorBinned>cc-5)]
curColor = colorBinned[(colorBinned<=cc) & (colorBinned>cc-5)]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(curDf['cx'],curDf['cy'],zs=curDf['cz'],c=curColor)
print np.unique(curColor)
In [88]:
print 'mean weighted: ', dfcut['weighted'].mean()
(chi2,p) = ss.chisquare(dfcut['weighted'],f_exp=dfcut['weighted'].mean(),ddof=1)
print "chi-squared statistic: ",chi2
print "p-value: ", p
In [112]:
miny = dfcut['cy'].min()
maxy = dfcut['cy'].max()
th0 = miny
th1 = (maxy - miny)/4 + miny
th2 = (maxy - miny)/2 + miny
th3 = (maxy - miny)*3/4 + miny
th4 = maxy
df1 = dfcut[(dfcut['cy']>=th0) & (dfcut['cy']<th1)]
df2 = dfcut[(dfcut['cy']>=th1) & (dfcut['cy']<th2)]
df3 = dfcut[(dfcut['cy']>=th2) & (dfcut['cy']<th3)]
df4 = dfcut[(dfcut['cy']>=th3) & (dfcut['cy']<=th4)]
# whoops ran out of time